In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
a = pd.read_csv("C://Users/zayt/Downloads/all/all_ticks_long.csv")
print(a.isna().sum())
# Select 3 stocks for analysis
selected_stocks = ['EREGL', 'TUPRS', 'YKBNK', 'AEFES', 'ARCLK', 'THYAO']
a['timestamp'] = pd.to_datetime(a['timestamp'])
a['Year'] = a['timestamp'].dt.year
a['Month'] = a['timestamp'].dt.month
a['Day'] = a['timestamp'].dt.day
a['Hour'] = a['timestamp'].dt.hour
a['Minute'] = a['timestamp'].dt.minute
a['Second'] = a['timestamp'].dt.second
print(a.head())
eregl = a[a['short_name'].str.contains('EREGL')].sort_values(by= 'timestamp').reset_index(drop=True)
tuprs = a[a['short_name'].str.contains('TUPRS')].sort_values(by= 'timestamp').reset_index(drop=True)
ykbnk = a[a['short_name'].str.contains('YKBNK')].sort_values(by= 'timestamp').reset_index(drop=True)
aefes = a[a['short_name'].str.contains('AEFES')].sort_values(by= 'timestamp').reset_index(drop=True)
arclk = a[a['short_name'].str.contains('ARCLK')].sort_values(by= 'timestamp').reset_index(drop=True)
thyao = a[a['short_name'].str.contains('THYAO')].sort_values(by= 'timestamp').reset_index(drop=True)
years_selected = [2017, 2018]
eregl_= eregl[eregl['Year'].isin(years_selected)]
tuprs_= tuprs[tuprs['Year'].isin(years_selected)]
ykbnk_= ykbnk[ykbnk['Year'].isin(years_selected)]
aefes_= aefes[aefes['Year'].isin(years_selected)]
arclk_= arclk[arclk['Year'].isin(years_selected)]
thyao_= thyao[thyao['Year'].isin(years_selected)]
def group_by_month(df):
# Group the DataFrame by 'Year' and 'Month'
grouped_data = df.groupby(['Year', 'Month'])
# Initialize an empty list to store the grouped DataFrames
grouped_dataframes = []
# Iterate through the groups and retrieve each group
for (year, month), group_df in grouped_data:
# Append the group DataFrame to the list
grouped_dataframes.append(group_df)
# Return the list of grouped DataFrames
return grouped_dataframes
short_name 0 timestamp 0 price 0 dtype: int64 short_name timestamp price Year Month Day Hour \ 0 AEFES 2012-09-17 06:45:00+00:00 22.3978 2012 9 17 6 1 AEFES 2012-09-17 07:00:00+00:00 22.3978 2012 9 17 7 2 AEFES 2012-09-17 07:15:00+00:00 22.3978 2012 9 17 7 3 AEFES 2012-09-17 07:30:00+00:00 22.3978 2012 9 17 7 4 AEFES 2012-09-17 07:45:00+00:00 22.5649 2012 9 17 7 Minute Second 0 45 0 1 0 0 2 15 0 3 30 0 4 45 0
In [ ]:
eregl_grouped=group_by_month(eregl_)
# Create an empty list to store prices for each month
prices_by_month = []
# Create an empty list to store month labels
month_labels = []
# Loop through each month, check if data exists, and append prices and labels
for year in [2017, 2018]:
for month in range(1, 13):
relevant_data = [group['price'].tolist() for group in eregl_grouped if (group['Year'].iloc[0] == year) and (group['Month'].iloc[0] == month)]
if relevant_data:
prices_by_month.extend(relevant_data)
month_labels.append(f"{year}/{month:02d}")
# Create a boxplot
plt.figure(figsize=(12, 6))
plt.boxplot(prices_by_month)
plt.xticks(range(1, len(month_labels) + 1), month_labels, rotation=45)
plt.title('EREGL Stock Prices (2018-2019) by Month')
plt.xlabel('Month')
plt.ylabel('Price')
plt.grid(True)
plt.show()
In [ ]:
tuprs_grouped=group_by_month(tuprs_)
# Create an empty list to store prices for each month
prices_by_month = []
# Create an empty list to store month labels
month_labels = []
# Loop through each month, check if data exists, and append prices and labels
for year in [2017, 2018]:
for month in range(1, 13):
relevant_data = [group['price'].tolist() for group in tuprs_grouped if (group['Year'].iloc[0] == year) and (group['Month'].iloc[0] == month)]
if relevant_data:
prices_by_month.extend(relevant_data)
month_labels.append(f"{year}/{month:02d}")
# Create a boxplot
plt.figure(figsize=(12, 6))
plt.boxplot(prices_by_month)
plt.xticks(range(1, len(month_labels) + 1), month_labels, rotation=45)
plt.title('TUPRS Stock Prices (2018-2019) by Month')
plt.xlabel('Month')
plt.ylabel('Price')
plt.grid(True)
plt.show()
In [ ]:
ykbnk_grouped=group_by_month(ykbnk_)
# Create an empty list to store prices for each month
prices_by_month = []
# Create an empty list to store month labels
month_labels = []
# Loop through each month, check if data exists, and append prices and labels
for year in [2017, 2018]:
for month in range(1, 13):
relevant_data = [group['price'].tolist() for group in ykbnk_grouped if (group['Year'].iloc[0] == year) and (group['Month'].iloc[0] == month)]
if relevant_data:
prices_by_month.extend(relevant_data)
month_labels.append(f"{year}/{month:02d}")
# Create a boxplot
plt.figure(figsize=(12, 6))
plt.boxplot(prices_by_month)
plt.xticks(range(1, len(month_labels) + 1), month_labels, rotation=45)
plt.title('YKBNK Stock Prices (2018-2019) by Month')
plt.xlabel('Month')
plt.ylabel('Price')
plt.grid(True)
plt.show()
In [ ]:
aefes_grouped=group_by_month(aefes_)
# Create an empty list to store prices for each month
prices_by_month = []
# Create an empty list to store month labels
month_labels = []
# Loop through each month, check if data exists, and append prices and labels
for year in [2017, 2018]:
for month in range(1, 13):
relevant_data = [group['price'].tolist() for group in aefes_grouped if (group['Year'].iloc[0] == year) and (group['Month'].iloc[0] == month)]
if relevant_data:
prices_by_month.extend(relevant_data)
month_labels.append(f"{year}/{month:02d}")
# Create a boxplot
plt.figure(figsize=(12, 6))
plt.boxplot(prices_by_month)
plt.xticks(range(1, len(month_labels) + 1), month_labels, rotation=45)
plt.title('AEFES Stock Prices (2018-2019) by Month')
plt.xlabel('Month')
plt.ylabel('Price')
plt.grid(True)
plt.show()
In [ ]:
arclk_grouped=group_by_month(arclk_)
# Create an empty list to store prices for each month
prices_by_month = []
# Create an empty list to store month labels
month_labels = []
# Loop through each month, check if data exists, and append prices and labels
for year in [2017, 2018]:
for month in range(1, 13):
relevant_data = [group['price'].tolist() for group in arclk_grouped if (group['Year'].iloc[0] == year) and (group['Month'].iloc[0] == month)]
if relevant_data:
prices_by_month.extend(relevant_data)
month_labels.append(f"{year}/{month:02d}")
# Create a boxplot
plt.figure(figsize=(12, 6))
plt.boxplot(prices_by_month)
plt.xticks(range(1, len(month_labels) + 1), month_labels, rotation=45)
plt.title('ARCLK Stock Prices (2018-2019) by Month')
plt.xlabel('Month')
plt.ylabel('Price')
plt.grid(True)
plt.show()
In [ ]:
thyao_grouped=group_by_month(thyao_)
# Create an empty list to store prices for each month
prices_by_month = []
# Create an empty list to store month labels
month_labels = []
# Loop through each month, check if data exists, and append prices and labels
for year in [2017, 2018]:
for month in range(1, 13):
relevant_data = [group['price'].tolist() for group in thyao_grouped if (group['Year'].iloc[0] == year) and (group['Month'].iloc[0] == month)]
if relevant_data:
prices_by_month.extend(relevant_data)
month_labels.append(f"{year}/{month:02d}")
# Create a boxplot
plt.figure(figsize=(12, 6))
plt.boxplot(prices_by_month)
plt.xticks(range(1, len(month_labels) + 1), month_labels, rotation=45)
plt.title('THYAO Stock Prices (2018-2019) by Month')
plt.xlabel('Month')
plt.ylabel('Price')
plt.grid(True)
plt.show()
In [ ]:
# Function to identify and plot outliers for each company
def create_control_charts(dataframes):
for company_df in dataframes:
for i, month_data in company_df.groupby(['Year', 'Month']):
mean = month_data['price'].mean()
std_dev = month_data['price'].std()
lower_bound = mean - 3 * std_dev
upper_bound = mean + 3 * std_dev
outliers = month_data[(month_data['price'] < lower_bound) | (month_data['price'] > upper_bound)]
if not outliers.empty:
plt.figure(figsize=(12, 6))
plt.plot(month_data['timestamp'], month_data['price'], label='Data Line')
plt.axhline(y=upper_bound, color='r', linestyle='--', label='Upper Control Limit')
plt.axhline(y=lower_bound, color='r', linestyle='--', label='Lower Control Limit')
plt.axhline(y=mean, color='g', linestyle='--', label='Mean')
plt.scatter(outliers['timestamp'], outliers['price'], color='red', label='Outliers', zorder=5)
plt.title(f'{company_df["short_name"].iloc[0]} Control Chart with Outliers')
plt.xlabel('Timestamp')
plt.ylabel('Price')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.show()
if __name__ == "__main__":
a = pd.read_csv("C://Users/zayt/Downloads/all/all_ticks_long.csv")
# ... (your data preprocessing code)
selected_companies = [eregl, tuprs, ykbnk, aefes, arclk, thyao]
create_control_charts(selected_companies)